Initial work in mapping EDT and Waze incident data
Started 2017-11-28
Ideas:
# Read in libraries and set working directory
knitr::opts_chunk$set(echo = T, warning=F, message=F)
options(width = 2400, stringsAsFactors = F)
library(ggplot2)
library(tidyverse)
library(maps) # for mapping base layers
library(reshape)
library(DT) # for datatable
# devtools::install_github("ropensci/plotly") # for latest version
library(plotly) # do after ggplot2
library(sp)
library(maptools) # readShapePoly() and others
library(mapproj) # for coord_map()
library(rgdal) # for readOGR(), needed for reading in ArcM shapefiles
library(rgeos) # for gIntersection, to clip two shapefiles
library(raster)
wazedir <- "W:/SDI Pilot Projects/Waze/Working Documents"
knitr::opts_knit$set(root.dir = wazedir)
# Initial run: Start with the projected EDT and Waze data that Lia has prepared. It appears this is just the one week of August data for MD; use aa pilot, expand to other periods after. Clean these data and save as .RData. If already done, load the processed data.
if(length(grep('EDT_Waze.RData', dir())) < 1){
waz.shp <- readOGR(dsn = ".", "Waze_sample_projected")
edt.shp <- readOGR(dsn = ".", "EDT_sample_projected")
# Fix time values: convert pub_millis to usable POSIX date class
waz.shp$time <- as.POSIXct(waz.shp$pub_millis/1000, origin = "1970-01-01", tz="America/New_York")
# Convert crashdate and time in EDT to POSIX
edt.time <- paste(as.character(edt.shp$CrashDate), " ",
edt.shp$HourofDay, ":",
formatC(as.numeric(as.character(edt.shp$MinuteofDa)), width = 2, flag = "0"), sep = "")
edt.shp$time <- strptime(edt.time, format = "%Y/%m/%d %H:%M", tz="America/New_York")
# Read in county data from Census
counties <- readOGR("CensusCounty/.")
md_counties <- counties[counties$STATEFP == 24,]
# Save imported waze and EDT files as .RData for faster import after the first time
save(file = "EDT_Waze.RData", list = c("waz.shp", "edt.shp", "md_counties"))
} else { load('EDT_Waze.RData') }
# Make a MD map
mdpoly <- map('state', 'Maryland')
# all points...
points(waz.shp$longitude, waz.shp$latitude,
pch = 16,
cex = 0.5,
col = alpha('grey20', 0.1))
# all points...
points(edt.shp$GPSLong_Ne, edt.shp$GPSLat,
pch = 21,
cex = 0.5,
col = alpha('midnightblue', 0.2))
counties <- map_data("county", "maryland")
waz.shp$tooltiptext <- with(waz.shp@data, paste(city, alert_type, time, sep = "\n"))
waz.shp$tooltiptext <- sub("NA", "", waz.shp$tooltiptext)
edt.shp$tooltiptext <- with(edt.shp@data, paste(County, HighestInj, time, sep = "\n"))
edt.shp$tooltiptext <- sub("NA", "", edt.shp$tooltiptext)
map.p <- ggplot() +
ggtitle("2017-08-01 Spatial Overlay") +
geom_polygon(data = counties, aes(x = long, y = lat, group = group),
color = "white", fill = "grey80") + theme_void() +
coord_fixed(1.3) +
geom_point(data = waz.shp[format(waz.shp$time, "%d") == "01",]@data, # 2017-08-01 first
aes(
x = longitude,
y = latitude,
color = alert_type,
text = tooltiptext),
shape = 16) +
geom_point(data = edt.shp[format(edt.shp$time, "%d") == "01",]@data, # 2017-08-01 first
aes(
x = GPSLong_Ne,
y = GPSLat,
text = tooltiptext,
color = 'EDT'),
shape = 16) +
scale_color_brewer(palette = "Set1",
name = "Waze Alert Type and EDT Crash")
ggplotly(map.p, tooltip = "text", width = 1000)
# make a table of frequency of alert_type by day
freq.tab <- aggregate(uuid ~ format(time, "%d") + alert_type,
FUN = length,
data = waz.shp@data)
datatable(freq.tab,
filter = 'top',
colnames = c("Day" = 1, "Alert Type" = 2, "Count"= 3),
rownames = F,
options = list(dom = "ftip"))
# for each EDT event, find the number and distance for nearest neighbors in space and time from Waze data
ex <- edt.shp@data
distbreaks <- c(0, 0.1, 0.5, 1, 5, 10, 15, 25, 50, 100, 300, 1000)
distcounts <- vector()
for(i in 1:nrow(ex)){
daterangemin <- ex[i, 'time'] - 2*60*60
daterangemax <- ex[i, 'time'] + 2*60*60
# find waze events within 3 hrs of this crash
wx <- waz.shp[waz.shp$time > daterangemin & waz.shp$time < daterangemax,]
if(nrow(wx) ==0) {
b <- rep(0, length(distbreaks))
} else {
w_sp <- SpatialPoints(coords = data.frame(wx$longitude, wx$latitude))
e_sp <- SpatialPoints(coords = data.frame(ex[i,'GPSLong_Ne'], ex[i,'GPSLat']))
t1 <- spDists(w_sp, e_sp, longlat = T) # distance in km to each edt event
# Sum of events with 0.1 to 100 mi in various chunks
b <- hist(t1, breaks = distbreaks, plot = F)$counts
}
distcounts <- rbind(distcounts, b)
# Sum of events within 1 to 120 minutes
}
colnames(distcounts) = distbreaks[2:length(distbreaks)]
rownames(distcounts) = ex$ID
dm <- melt(distcounts)
colnames(dm) <- c("ID", "Bin", "Count")
gp <- ggplot(dm) + geom_histogram(aes(x = Count)) + facet_wrap(~Bin) + ylab("Frequency") + xlab("Count of Waze Events")
ggplotly(gp, width = 1000)